Mucosal microbiota alterations in primary sclerosis cholangitis persist after liver transplantation and are associated with clinical features independently of geography

3. Hypothesis: PSC disease is associated with IBD


1 Introduction

1.1 About

An IBD vs. no-IBD comparison was performed within PSC patients (pre-LTx and rPSC individuals combined) to discover IBD-specific pattern.

IBD vs no_IBD analysis on merged data

  • Alpha diversity –> group effect, country effect, interaction effect

  • Beta diversity – PERMANOVA, PCA -> group effect, country effect, interaction effect

  • DAA ->

    • o Group effect – linDA + MaAsLin2 intersection

    • o Country effect – – taxa with significant interaction effect were excluded based on individual post-hoc analysis. Taxa had to have the same direction in both countries

IBD vs no_IBD – finding IBD-specific pattern in PSC patients

Importing libraries and custom functions built for this analysis

source("custom_functions.R")

1.2 Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

MICROBIOME

path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

IBD info

path = "../../data/clinical_data/"
cz <- read.csv(file.path(path,"clinical_metadata_cz.csv")) %>% dplyr::select(SampleID,PatientID,PSC_IBD,Group)

cz$PatientID <- as.character(cz$PatientID)
metadata_ikem <- merge(metadata_ikem,cz,by=c("SampleID", "Group"),all=TRUE)

Norway

MICROBIOME

path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
                                    check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
                                    check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
                                    check.names = FALSE))

IBD info

path = "../../data/clinical_data/"
no <- read.csv(file.path(path,"clinical_metadata_no.csv")) %>% dplyr::select(SampleID,subjectid,IBD, Group)

1.2.1 Merging

Merging clinical metadata

clinical_metadata <- bind_rows(
  cz %>% dplyr::mutate(PSC_IBD = case_when(
    PSC_IBD == "0" ~ "no_ibd",
    PSC_IBD == "1" ~ "ibd",
  )) %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")),
  no %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")) %>%
    mutate(PatientID=paste0("NO_",PatientID)))

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="TI",Q="Q3")
Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 979 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

ileum_metadata <- merge(ileum_metadata,
                        clinical_metadata %>%
                          dplyr::select(-Group),
                        by=c("SampleID"), all.x = TRUE) %>%
  dplyr::select(-PatientID,Group) %>% 
  dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="colon",Q="Q3")
Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 1157 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

colon_metadata <- merge(colon_metadata,
                        clinical_metadata %>%
                          dplyr::select(-Group),
                        by=c("SampleID"), all.x = TRUE) %>%
  dplyr::select(-PatientID,Group) %>% 
  dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)

1.3 Import clinical data

1.3.1 CZ

# clinical metadata
metadata_clinical <- read.csv("../../data/clinical_data/clinical_metadata_cz.csv")
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)

# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../data/clinical_data/dysbiosis_metadata.csv") %>%
  dplyr::filter(Country=="CZ")

# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
  "../results/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
  dplyr::filter(Country=="CZ")

metadata_alpha_colon <- read.csv(
  "../results/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
  dplyr::filter(Country=="CZ")

metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
  mutate(PatientID=Patient) %>%
  dplyr::select(-c(Patient, Group))

# MERGING
metadata_cz <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))

metadata_cz <- full_join(metadata_cz, metadata_alpha, by=c("SampleID","PatientID","Country"))

metadata_cz$Group <- factor(metadata_cz$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))

1.3.2 NO

# clinical metadata
metadata_clinical <- read.csv("../../data/clinical_data/clinical_metadata_no.csv")
metadata_clinical %<>% mutate(PatientID=subjectid,
                              Matrix=segment) %>%
  dplyr::select(-subjectid,-segment)
metadata_clinical$PatientID <- as.character(paste0("NO_",metadata_clinical$PatientID))

# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../data/clinical_data/dysbiosis_metadata.csv") %>%
  dplyr::filter(Country=="NO")  %>%
  dplyr::select(-c(Patient))

# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
  "../results/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
  dplyr::filter(Country=="NO")

metadata_alpha_colon <- read.csv(
  "../results/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
  dplyr::filter(Country=="NO")

metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
  mutate(PatientID=Patient) %>%
  dplyr::select(-c(Patient, Group))

# MERGING
metadata_no <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))

metadata_no <- full_join(metadata_no, metadata_alpha, by=c("SampleID","PatientID","Country"))

metadata_no$Group <- factor(metadata_no$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))

1.3.3 Merging metadata

metadata_cz %<>% dplyr::mutate(Calprotectin=Fecal.calprotectin,
                             AOM=AOM_score,
                             APRI=APRI_score,
                             FIB4=FIB4_score) %>%
  dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
                MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
                Albumin,ALT,PSC_IBD,GGT,INR,Albumin,Nancy_max,eMAYO,MAYO_dai,
                dys_unfiltered_asv,dys_unfiltered_genus,
                dys_filtered_asv,dys_filtered_genus,
                Observe,Shannon,Simpson,Pielou)

metadata_no %<>% dplyr::mutate(Platelets=TRC,
                             Creatinine=Kreatinin,
                             MAYO_PSC=Mayo_score,
                             AST=ASAT/60,
                             ALT=ALAT/60,
                             PSC_IBD=IBD,
                             Bilirubin=Bilirubin*17.1,
                             ALP=ALP/60,
                             Albumin=Albumin*10) %>%
  dplyr::mutate(PSC_IBD = case_when(
      PSC_IBD == "no_ibd" ~ "0",
      PSC_IBD == "ibd" ~ "1",
      TRUE ~ Group  # Keep other values as is
    )) %>%
  dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
                MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
                Albumin,ALT,PSC_IBD,
                dys_unfiltered_asv,dys_unfiltered_genus,
                dys_filtered_asv,dys_filtered_genus,
                Observe,Shannon,Simpson,Pielou) 

metadata_final <- merge(metadata_cz,metadata_no,all = TRUE)
metadata_final$Calprotectin[metadata_final$Calprotectin==">6000"] <- 6000
metadata_final$Calprotectin <- as.numeric(metadata_final$Calprotectin)

1.4 CHI SQUARE TEST

1.4.1 pre_LTx, rPSC, non-rPSC

cz <- cz %>% dplyr::group_by(PSC_IBD,Group) %>%
  distinct(PatientID, .keep_all = TRUE) %>%
  count() %>% drop_na() %>% 
  dplyr::mutate(PSC_IBD = case_when(
    PSC_IBD == "0" ~ "no_ibd",
    PSC_IBD == "1" ~ "ibd",
  )) %>% `colnames<-`(c("IBD","Group","n")) 

no <- no %>% dplyr::group_by(IBD,Group) %>%
  distinct(subjectid, .keep_all = TRUE) %>%
  count() %>% drop_na()

# Summarize by IBD and Group
data <- bind_rows(cz, no) %>%
  group_by(IBD, Group) %>%
  summarise(n = sum(n), .groups = "drop") %>%
  pivot_wider(names_from = Group, values_from = n) %>%
  column_to_rownames("IBD") %>%
  as.matrix()
    
chi_res <- chisq.test(data)
chi_res$expected
        non-rPSC  pre_ltx      rPSC
ibd    101.12774 96.11314 31.759124
no_ibd  19.87226 18.88686  6.240876
chi_res

    Pearson's Chi-squared test

data:  data
X-squared = 3.881, df = 2, p-value = 0.1436

1.4.2 pre_LTx + rPSC (PSC patients), non-rPSC

data2 <- data %>% as.data.frame() %>%
  dplyr::mutate(PSC=pre_ltx+rPSC) %>% 
  dplyr::select(PSC,`non-rPSC`) %>% as.matrix()

chi_res <- chisq.test(data2)
chi_res$expected
             PSC  non-rPSC
ibd    127.87226 101.12774
no_ibd  25.12774  19.87226
chi_res

    Pearson's Chi-squared test with Yates' continuity correction

data:  data2
X-squared = 0.20305, df = 1, p-value = 0.6523

1.4.3 rPSC, non-rPSC

data3 <- data %>% as.data.frame() %>%
  dplyr::select(rPSC,`non-rPSC`) %>% as.matrix()

chi_res <- chisq.test(data3)
chi_res$expected
            rPSC  non-rPSC
ibd    32.981132 105.01887
no_ibd  5.018868  15.98113
chi_res

    Pearson's Chi-squared test with Yates' continuity correction

data:  data3
X-squared = 0.69593, df = 1, p-value = 0.4042

2 Data Analysis - Terminal ileum

segment="terminal_ileum"

2.1 Filtering

Rules:

  • sequencing depth > 10000

  • nearZeroVar() with default settings

Rarefaction Curve

path="../intermediate_files/rarecurves"
seq_depth_threshold <- 10000
ps <- construct_phyloseq(ileum_asv_tab,ileum_taxa_tab,ileum_metadata)
rareres <- get_rarecurve(obj=ps, chunks=500)
save(rareres,file = file.path(path,"rarefaction_ileum.Rdata"))
load(file.path(path,"rarefaction_ileum.Rdata"))

prare <- ggrarecurve(obj=rareres,
                      factorNames="Country",
                      indexNames=c("Observe")) + 
  theme_bw() +
  theme(axis.text=element_text(size=8), panel.grid=element_blank(),
        strip.background = element_rect(colour=NA,fill="grey"),
        strip.text.x = element_text(face="bold")) + 
  geom_vline(xintercept = seq_depth_threshold, 
             linetype="dashed", 
             color = "red") + 
  xlim(0, 20000)
The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
prare

Library size

read_counts(ileum_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.1 Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
Removing 68 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.3 Final Counts

final_counts_filtering(ileum_asv_tab,
                       filt_ileum_asv_tab,
                       filt_ileum_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")
                            Count
Raw data: ASVs               4153
Raw data: Samples             214
Sequencing depth filt: ASVs  4085
Prevalence filt: ASVs           0
NearZeroVar filt: ASVs        449
Filt data: ASVs               449
Filt data: Samples            208
Filt data: Patients           208
Filt data: Patients.1           0
Filtered samples                6
Matrices                       TI
healthy                         0
non-rPSC                        0
rPSC                            0
pre_ltx                         0
post_ltx                        0
ETOH                            0

2.2 Alpha diversity

path = "../results/Q3/alpha_diversity"

Calculation

# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
                                         alpha_ileum_taxa_tab,
                                         alpha_ileum_metadata))

ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_div_plots <- list()

# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
                         Observe=ileum_mpse$Observe,
                         Shannon=ileum_mpse$Shannon,
                         Simpson=ileum_mpse$Simpson,
                         Pielou=ileum_mpse$Pielou,
                         Group=ileum_mpse$Group,
                         Country=ileum_mpse$Country,
                         Patient=ileum_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

2.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,show_legend = FALSE)
Using SampleID, Group, Country, Patient as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

2.2.2 Linear Model

path = "../results/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lm(formula = "Observe ~ Group * Country",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_emeans <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_emeans <- NA
}

# save the results
pc_observed <- list(); 
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -5.135 13.890 -0.370 0.712
no_ibd , ibd - CZ vs NO 0.000 17.652 0.000 1.000
no_ibd vs Groupibd:CountryNO -11.094 19.350 -0.573 0.567
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lm(formula = "Shannon ~ Group * Country",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_emeans <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_emeans <- NA
}

# save the results
pc_shannon <- list(); 
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd 0.034 0.182 0.187 0.852
no_ibd , ibd - CZ vs NO -0.070 0.231 -0.302 0.763
no_ibd vs Groupibd:CountryNO -0.171 0.254 -0.676 0.500
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lm(formula = "Simpson ~ Group * Country",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_emeans <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_emeans <- NA
}


# save the results
pc_simpson <- list(); 
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd 0.010 0.026 0.401 0.689
no_ibd , ibd - CZ vs NO 0.004 0.033 0.125 0.900
no_ibd vs Groupibd:CountryNO -0.031 0.036 -0.853 0.395
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Pielou

results_model <- pairwise.lm(formula = "Pielou ~ Group * Country",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction

if (!is.data.frame(results_model)){
  results_model_pielou <- results_model[[1]]
  results_model_pielou_emeans <- results_model[[2]]
} else {
  results_model_pielou <- results_model
  results_model_pielou_emeans <- NA
}

# save the results
pc_pielou <- list(); 
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)
# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")
Raw results of linear model of Pielou estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd 0.016 0.027 0.593 0.554
no_ibd , ibd - CZ vs NO -0.014 0.034 -0.419 0.676
no_ibd vs Groupibd:CountryNO -0.025 0.037 -0.674 0.501
knitr::kable(results_model_pielou_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

2.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]],
  Shannon=pc_shannon[[segment]])
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

2.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

2.3.1 Main analysis

Genus level, Aitchison distance

level="genus"
path = "../results/Q3/beta_diversity"
pairwise_aitchison_raw <- list()
pca_plots_list <- list()

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)
Removing 6 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]
2.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_ileum_metadata$Group,
                           covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 165.293 0.975 0.005 0.496 0.496
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 609.984 3.599 0.017 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 185.468 1.095 0.005 0.259 0.259

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]

if (length(interaction_sig)>0){
  for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2)
  print(result_list)
}
}
2.3.1.0.2 Plots

PCA

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

2.3.1.1 PCA + correlations

variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai")
metadata_clinical <- metadata_final
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)

metadata_ileum_pca <- merge(filt_ileum_metadata,metadata_clinical[,c("SampleID",variables)],by="SampleID",all.x=TRUE) %>% dplyr::select(SampleID, Group, Country,variables) 
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(variables)

  # Now:
  data %>% select(all_of(variables))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
res <- pca_plot_corr(filt_ileum_genus_tab,
                     filt_ileum_genus_taxa,
                     filt_ileum_metadata,
                     show_boxplots = FALSE,
                     variable = "Group", size=3, 
                     segment=segment,
                     show_legend= TRUE,
                     clinical = TRUE, clinical_metadata = metadata_ileum_pca,
                     variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai"))

res
[[1]]
                 variable          r         p     p.adj
Calprotectin Calprotectin 0.12134529 0.1027229 0.4108915
Nancy_max       Nancy_max 0.02160555 0.8285032 0.8285032
eMAYO               eMAYO 0.08919279 0.3702805 0.4937074
MAYO_dai         MAYO_dai 0.10595651 0.2867767 0.4937074

[[2]]
                 variable          r         p     p.adj
Calprotectin Calprotectin 0.02112069 0.7771752 0.7771752
Nancy_max       Nancy_max 0.03027382 0.7614588 0.7771752
eMAYO               eMAYO 0.04867828 0.6253437 0.7771752
MAYO_dai         MAYO_dai 0.08530684 0.3915737 0.7771752

[[3]]
                                       r            p   p_adjusted
Barnesiella                  -0.28802148 2.660865e-05 3.628452e-05
Butyricimonas                 0.04762419 4.942348e-01 4.942348e-01
Odoribacter                  -0.16878430 1.489364e-02 1.595747e-02
Alistipes                    -0.52423416 0.000000e+00 0.000000e+00
Parabacteroides              -0.29219274 2.010965e-05 3.016447e-05
Enterococcus                  0.58191580 0.000000e+00 0.000000e+00
Coprococcus                  -0.36086396 1.060335e-07 2.053779e-07
Fusicatenibacter             -0.36520459 7.295307e-08 2.053779e-07
Lachnoclostridium             0.21660586 1.711591e-03 2.139489e-03
Lachnospiraceae_FCS020_group -0.29541587 1.614984e-05 2.691641e-05
Oscillibacter                -0.20365066 3.224972e-03 3.721122e-03
Faecalibacterium             -0.36187478 9.723698e-08 2.053779e-07
Pseudomonas                   0.50726238 0.000000e+00 0.000000e+00
Enterorhabdus                 0.49231489 0.000000e+00 0.000000e+00
Holdemania                    0.36048391 1.095349e-07 2.053779e-07

[[4]]
                                        r            p   p_adjusted
Barnesiella                  -0.230021123 8.548387e-04 2.988334e-03
Butyricimonas                -0.216541849 1.717109e-03 3.679520e-03
Odoribacter                  -0.172090114 1.302112e-02 2.143583e-02
Alistipes                    -0.223637537 1.195333e-03 2.988334e-03
Parabacteroides              -0.117146202 9.195283e-02 1.253902e-01
Enterococcus                 -0.060688739 3.835622e-01 4.794528e-01
Coprococcus                   0.308484422 6.465693e-06 3.232847e-05
Fusicatenibacter              0.224160279 1.163353e-03 2.988334e-03
Lachnoclostridium             0.455714956 4.924212e-12 7.386319e-11
Lachnospiraceae_FCS020_group -0.003417826 9.608962e-01 9.608962e-01
Oscillibacter                -0.169807119 1.429055e-02 2.143583e-02
Faecalibacterium              0.353934967 1.905132e-07 1.428849e-06
Pseudomonas                   0.190446091 5.929778e-03 1.111833e-02
Enterorhabdus                -0.038188166 5.836507e-01 6.253401e-01
Holdemania                   -0.043982333 5.278616e-01 6.090710e-01

heatmap

df_heatmap <- melt(res[[4]] %>% rownames_to_column("SeqID") %>% 
                     mutate(PCo2=r) %>% select(SeqID,PCo2))
Using SeqID as id variables
labels_df <- melt(res[[4]] %>% rownames_to_column("SeqID") %>% select(SeqID,p_adjusted))
Using SeqID as id variables
labels <- labels_df
labels$variable <- "PCo2"
labels$value <- NA
labels$value[labels_df$value<=0.001] <- "***"
labels$value[labels_df$value<=0.01] <- "**"
labels$value[labels_df$value<=0.05] <- "*"

p_mdi_ileum <- ggplot() + 
  geom_tile(data=df_heatmap, aes(variable, SeqID, fill= value))  + 
  theme_minimal() + 
  scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.7, 0.7)) + 
  geom_text(data=labels,aes(x=variable,y=SeqID,label=value,vjust=0.6)) +
  xlab("") + ylab("")  

p_mdi_ileum
Warning: Removed 5 rows containing missing values or values outside the scale range
(`geom_text()`).

2.3.1.2 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

2.3.2 Supplementary analysis

supplements_beta <- list()

2.3.2.1 Genus level

level="genus"
2.3.2.1.1 Bray-Curtis

PERMANOVA

pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "bray", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "bray", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.198 0.893 0.004 0.581 0.581
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 1.523 6.858 0.032 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.262 1.179 0.006 0.264 0.264

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
  for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'bray')
  print(result_list)
}
}

Plots

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata,
                                 measure = "bray",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

2.3.2.1.2 Jaccard

PERMANOVA

pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "jaccard", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor, pp_cov, pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.267 0.87 0.004 0.704 0.704
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 1.569 5.123 0.024 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.344 1.123 0.005 0.26 0.26

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'jaccard')
  print(result_list)
}
}

Plots

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata,
                                 measure = "jaccard",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

2.3.2.2 ASV level

level="ASV"
2.3.2.2.1 Aitchison

PERMANOVA

# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")

pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("aitchison",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 252.554 1.038 0.005 0.327 0.327
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 600.047 2.467 0.012 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 331.978 1.367 0.007 0.019 0.019 *

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2)
  print(result_list)
}
}
$no_ibd_ibd_CZ
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999

adonis2(formula = x1 ~ Fac, data = x2, permutations = perm)
          Df SumOfSqs      R2      F Pr(>F) p.adjusted
Fac        1    322.6 0.01055 1.3221  0.031   0.041333
Residual 124  30261.4 0.98945                         
Total    125  30584.1 1.00000                         

$no_ibd_ibd_NO
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999

adonis2(formula = x1 ~ Fac, data = x2, permutations = perm)
         Df SumOfSqs     R2      F Pr(>F) p.adjusted
Fac       1    261.9 0.0134 1.0867  0.225      0.225
Residual 80  19278.7 0.9866                         
Total    81  19540.6 1.0000                         

$no_ibd_CZ_vs_NO
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999

adonis2(formula = x1 ~ Cov, data = x2, permutations = perm)
         Df SumOfSqs     R2      F Pr(>F) p.adjusted
Cov       1    381.9 0.0475 1.5458  0.005       0.01
Residual 31   7658.1 0.9525                         
Total    32   8040.0 1.0000                         

$ibd_CZ_vs_NO
Permutation test for adonis under reduced model
Terms added sequentially (first to last)
Permutation: free
Number of permutations: 999

adonis2(formula = x1 ~ Cov, data = x2, permutations = perm)
          Df SumOfSqs      R2      F Pr(>F) p.adjusted
Cov        1      550 0.01297 2.2725  0.001      0.004
Residual 173    41882 0.98703                         
Total    174    42432 1.00000                         

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                           filt_ileum_taxa_tab,
                           filt_ileum_metadata,
                           show_boxplots = TRUE,
                           variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA aitchison",level,segment)]] <- p

# see the results
p

2.3.2.2.2 Bray-Curtis

PERMANOVA

# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "bray", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "bray", p.adjust.m="BH")

pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.321 0.952 0.005 0.573 0.573
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 1.545 4.585 0.022 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.489 1.455 0.007 0.053 0.053

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'bray')
  print(result_list)
}
}

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                     filt_ileum_taxa_tab,
                     filt_ileum_metadata,
                     measure = "bray",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

2.3.2.2.3 Jaccard

PERMANOVA

# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, sim.method = "jaccard", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")

pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.381 0.95 0.005 0.637 0.637
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 1.278 3.185 0.015 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.514 1.284 0.006 0.051 0.051

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'jaccard')
  print(result_list)
}
}

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                     filt_ileum_taxa_tab,
                     filt_ileum_metadata,
                     measure = "jaccard",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

2.3.2.3 Saving results

write.xlsx(supplements_beta[!grepl("PCoA",names(supplements_beta))],
           file = file.path(path,
           paste0("supplements_beta_diversity_", segment,".xlsx")))

2.4 Univariate Analysis

2.4.1 Main analysis

level="genus"
# needed paths
path = "../results/Q3/univariate_analysis"
path_maaslin=file.path("../intermediate_files/maaslin/Q3",level)
# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list(); 
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

# PSC effect
psc_effect <- list()

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
                                                  ileum_genus_taxa_tab)

2.4.1.1 ibd vs no_ibd

group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])
2.4.1.1.1 linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
Removing 6 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group * Country')
0  features are filtered!
The filtered data has  208  samples and  184  features will be tested!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_ileum_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcano

2.4.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

2.4.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

2.4.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_ileum_uni_data,
                                          filt_ileum_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)
2.4.1.1.5 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

2.4.1.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
  p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
  p_heatmap_linda
}

Dotheatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      ileum_taxa_tab)
dotheatmap_linda

}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

2.4.1.3 Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# PSC effect
write.xlsx(psc_effect[[paste(segment,level)]],file.path(path,paste0("psc_effect_",segment,".xlsx")))

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

2.4.2 Supplementary Analysis

supplements_uni <- list()
supplements_wb <- createWorkbook()

2.4.2.1 ASV level

level="ASV"
path_maaslin="../intermediate_files/maaslin/Q3/ASV/"
raw_linda_results <- list();
raw_linda_results[[segment]] <- list()
linda_results <- list(); 
linda_results[[segment]] <- list()
2.4.2.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_asv_tab,
                            ileum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
Removing 68 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group * Country')
0  features are filtered!
The filtered data has  208  samples and  449  features will be tested!
Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group * Country"): Some features have less than 3 nonzero values! 
                        They have virtually no statistical power. You may consider filtering them in the analysis!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_ileum_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Interaction effect

list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_ileum_uni_data,
                                          filt_ileum_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
2.4.2.1.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      ileum_taxa_tab)
dotheatmap_linda
}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

2.4.2.2 Phylum level

level="phylum"
path_maaslin="../intermediate_files/maaslin/Q3/Phylum/"
raw_linda_results_phylum <- list();
raw_linda_results_phylum[[segment]] <- list()
linda_results_phylum <- list(); 
linda_results_phylum[[segment]] <- list()

Aggregate taxa

phylum_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = "Phylum")

ileum_phylum_tab <- phylum_data[[1]]
ileum_phylum_taxa_tab <- phylum_data[[2]]
2.4.2.2.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_phylum_tab,
                            ileum_phylum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")

filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group * Country')
0  features are filtered!
The filtered data has  208  samples and  10  features will be tested!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results_phylum[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_phylum[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)
Using Phylum for naming
volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle("NO vs CZ")
Using Phylum for naming
volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_ileum_uni_taxa) +
            ggtitle("Interaction effect")
Using Phylum for naming
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_phylum, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Interaction effect

list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_ileum_uni_data,
                                          filt_ileum_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_phylum[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
2.4.2.2.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>1){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      ileum_taxa_tab)
dotheatmap_linda
}

2.4.2.3 Saving results

# ALL DATA
saveWorkbook(supplements_wb,file.path(path,paste0("supplements_uni_analysis_wb_",segment,".xlsx")),overwrite = TRUE)

# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("supplements_significant_taxa_",segment,".xlsx")))

2.5 Results overview

2.5.0.1 Alpha diversity

knitr::kable(pc_observed[[segment]],
             digits = 3,
             caption = "Results of linear model testing ASV Richness")
Results of linear model testing ASV Richness
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -5.135 13.890 -0.370 0.712
no_ibd , ibd - CZ vs NO 0.000 17.652 0.000 1.000
no_ibd vs Groupibd:CountryNO -11.094 19.350 -0.573 0.567
knitr::kable(pc_shannon[[segment]],
             digits = 3,
             caption = "Results of linear model testing Shannon index")
Results of linear model testing Shannon index
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd 0.034 0.182 0.187 0.852
no_ibd , ibd - CZ vs NO -0.070 0.231 -0.302 0.763
no_ibd vs Groupibd:CountryNO -0.171 0.254 -0.676 0.500
knitr::kable(pc_simpson[[segment]],
             digits = 3,
             caption = "Results of linear model testing Simpson index")
Results of linear model testing Simpson index
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd 0.010 0.026 0.401 0.689
no_ibd , ibd - CZ vs NO 0.004 0.033 0.125 0.900
no_ibd vs Groupibd:CountryNO -0.031 0.036 -0.853 0.395
knitr::kable(pc_pielou[[segment]],
             digits = 3,
             caption = "Results of linear model testing Pielou index")
Results of linear model testing Pielou index
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd 0.016 0.027 0.593 0.554
no_ibd , ibd - CZ vs NO -0.014 0.034 -0.419 0.676
no_ibd vs Groupibd:CountryNO -0.025 0.037 -0.674 0.501

Plots

alpha_div_plots[[paste(segment,"Country")]]

2.5.0.2 Beta diversity

Main results

knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
             digits = 3,
             caption = "Results of PERMANOVA - robust aitchison distance")
Results of PERMANOVA - robust aitchison distance
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 165.293 0.975 0.005 0.496 0.496
no_ibd vs ibd , Country 1 609.984 3.599 0.017 0.001 0.001 ***
no_ibd vs ibd : Country 1 185.468 1.095 0.005 0.259 0.259

PCA

pca_plots_list[[paste(segment,"genus custom")]]

Supplements

knitr::kable(supplements_beta[!grepl("PCoA",names(supplements_beta)) & (grepl("genus",names(supplements_beta)))],
             digits = 3,
             caption = "Supplementary PERMANOVA results: Bray-curtis, Jaccard distances")
Supplementary PERMANOVA results: Bray-curtis, Jaccard distances
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.198 0.893 0.004 0.581 0.581
no_ibd vs ibd , Country 1 1.523 6.858 0.032 0.001 0.001 ***
no_ibd vs ibd : Country 1 0.262 1.179 0.006 0.264 0.264
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.267 0.870 0.004 0.704 0.704
no_ibd vs ibd , Country 1 1.569 5.123 0.024 0.001 0.001 ***
no_ibd vs ibd : Country 1 0.344 1.123 0.005 0.260 0.260

PCA

ggarrange(plotlist = supplements_beta[grepl("PCoA",names(supplements_beta))],
          labels=names(supplements_beta[grepl("PCoA",names(supplements_beta))]),
          font.label = list(size=5,face="plain"),
          ncol=2,nrow=3)

2.5.0.3 Univariate analysis

Number of significant taxa

knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")
Number of significant taxa
Count
terminal_ileum genus no_ibd vs ibd 0
terminal_ileum ASV no_ibd vs ibd 0
terminal_ileum phylum no_ibd vs ibd 0

3 Data Analysis - Colon

segment="colon"

3.1 Filtering

Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order

Library size

read_counts(colon_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.1 Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
Removing 52 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Check zero depth

data_filt <- check_zero_depth(filt_colon_asv_tab, 
                              filt_colon_taxa_tab, 
                              filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]; 
filt_colon_taxa_tab <- data_filt[[2]]; 
filt_colon_metadata <- data_filt[[3]]; 

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.3 Final Counts

final_counts_filtering(colon_asv_tab,
                       filt_colon_asv_tab,
                       filt_colon_metadata,
                       seq_step, 0, nearzero_step)
                                               V1
Raw data: ASVs                               5779
Raw data: Samples                             616
Sequencing depth filt: ASVs                  5727
Prevalence filt: ASVs                           0
NearZeroVar filt: ASVs                        306
Filt data: ASVs                               306
Filt data: Samples                            599
Filt data: Patients                           263
Filt data: Patients.1                           0
Filtered samples                               17
Matrices                    CA;CD;SI;Cecum;Rectum
healthy                                         0
non-rPSC                                        0
rPSC                                            0
pre_ltx                                         0
post_ltx                                        0
ETOH                                            0

3.2 Alpha diversity

path = "../results/Q3/alpha_diversity"

Calculation

# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
                                         alpha_colon_taxa_tab,
                                         alpha_colon_metadata))

colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
                         Observe=colon_mpse$Observe,
                         Shannon=colon_mpse$Shannon,
                         Simpson=colon_mpse$Simpson,
                         Pielou=colon_mpse$Pielou,
                         Group=colon_mpse$Group,
                         Country=colon_mpse$Country,
                         Patient=colon_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

3.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data)
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables

Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

3.2.2 Linear Model

path = "../results/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lmer(
  formula = "Observe ~ Group * Country + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_detailed <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_detailed <- NA
}

# save the results
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -2.810 11.952 439.998 -0.235 0.814 0.814
no_ibd vs ibd - CZ vs NO -10.684 15.292 332.353 -0.699 0.485 0.485
no_ibd vs Groupibd:CountryNO -5.680 16.562 341.720 -0.343 0.732 0.732
knitr::kable(results_model_observe_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lmer(
  formula = "Shannon ~ Group * Country + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_detailed <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_detailed <- NA
}

# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd 0.033 0.158 456.049 0.211 0.833 0.833
no_ibd vs ibd - CZ vs NO -0.138 0.203 336.238 -0.678 0.498 0.498
no_ibd vs Groupibd:CountryNO -0.119 0.220 346.763 -0.540 0.589 0.589
knitr::kable(results_model_shannon_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lmer(
  formula = "Simpson ~ Group * Country + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_detailed <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_detailed <- NA
}

# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -0.007 0.026 453.407 -0.261 0.794 0.794
no_ibd vs ibd - CZ vs NO -0.025 0.033 334.532 -0.744 0.457 0.457
no_ibd vs Groupibd:CountryNO 0.005 0.036 344.950 0.131 0.896 0.896
knitr::kable(results_model_simpson_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

3.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
  Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
  Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"))
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

3.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

3.3.1 Main analysis - Genus, Aitchison

Genus level, Aitchison distance

level="genus"
path = "../results/Q3/beta_diversity"

Aggregation, filtering

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             colon_metadata,
                            seq_depth_threshold=10000)
Removing 10 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]
3.3.1.0.1 PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                           covariate = filt_colon_genus_metadata$Country, 
                           patients = filt_colon_genus_metadata$Patient,
                           sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                          covariate = filt_colon_genus_metadata$Country, 
                          interaction = TRUE, 
                          patients = filt_colon_genus_metadata$Patient,
                          sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 172.58 1.118 0.002 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 1345.829 8.716 0.014 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 171.139 1.109 0.002 1 1

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]

if (length(interaction_sig)>0){
 for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_genus_metadata$Group,
                      covariate = filt_colon_genus_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      patients = filt_colon_genus_metadata$Patient)
  print(result_list)
} 
}
3.3.1.0.2 Plots

PCoA custom

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=2, 
                                 show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

3.3.1.1 PCA + correlations

variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai")
metadata_clinical <- metadata_final
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)

metadata_colon_pca <- merge(filt_colon_metadata %>% dplyr::mutate(PatientID=Patient),metadata_clinical[,c("SampleID",variables)],by="SampleID",all.x=TRUE) %>% dplyr::select(SampleID, Group, Country,variables,PatientID) 


res <- pca_plot_corr(filt_colon_genus_tab,
                     filt_colon_genus_taxa,
                     filt_colon_metadata,
                     show_boxplots = FALSE,
                     variable = "Group", size=3, 
                     segment=segment,
                     show_legend= FALSE,
                     clinical = TRUE, 
                     clinical_metadata = metadata_colon_pca,
                     variables= c("Calprotectin","Nancy_max","eMAYO","MAYO_dai"))

res
[[1]]
                 variable          r          p      p.adj
Calprotectin Calprotectin 0.15183991 0.02211692 0.08846768
Nancy_max       Nancy_max 0.02893904 0.76623279 0.76623279
eMAYO               eMAYO 0.11425372 0.23903459 0.31871279
MAYO_dai         MAYO_dai 0.15876800 0.10075720 0.20151441

[[2]]
                 variable            r         p     p.adj
Calprotectin Calprotectin -0.036206277 0.5873577 0.9814957
Nancy_max       Nancy_max -0.121916388 0.2087739 0.8350957
eMAYO               eMAYO  0.007247831 0.9406554 0.9814957
MAYO_dai         MAYO_dai  0.002258098 0.9814957 0.9814957

[[3]]
                                        r            p   p_adjusted
Rothia                        0.390506302 7.317003e-11 2.299629e-10
Barnesiella                  -0.173311204 4.871936e-03 6.304859e-03
Butyricimonas                 0.086206373 1.632279e-01 1.795507e-01
Odoribacter                  -0.156940417 1.087274e-02 1.258949e-02
Alistipes                    -0.540791408 0.000000e+00 0.000000e+00
Parabacteroides              -0.160141748 9.343463e-03 1.141979e-02
Enterococcus                  0.557840325 0.000000e+00 0.000000e+00
Coprococcus                  -0.297947401 9.700752e-07 1.778471e-06
Fusicatenibacter             -0.238752134 9.651101e-05 1.415495e-04
Hungatella                    0.498204405 0.000000e+00 0.000000e+00
Lachnoclostridium             0.316787297 1.789829e-07 4.375138e-07
Lachnospiraceae_FCS020_group -0.313187531 2.493911e-07 5.486604e-07
Colidextribacter             -0.319037397 1.451537e-07 3.991725e-07
Intestinimonas               -0.246489288 5.610989e-05 8.817268e-05
Oscillibacter                -0.211212640 5.797319e-04 7.971314e-04
Negativibacillus              0.003083252 9.602874e-01 9.602874e-01
Family_XIII_AD3011_group     -0.273931153 7.118703e-06 1.204704e-05
Dialister                    -0.048107039 4.369693e-01 4.577774e-01
Veillonella                   0.487572002 0.000000e+00 0.000000e+00
Klebsiella                    0.525435178 0.000000e+00 0.000000e+00
Pseudomonas                   0.452466206 0.000000e+00 0.000000e+00
Holdemania                    0.309448577 3.504242e-07 7.008484e-07

[[4]]
                                       r            p   p_adjusted
Rothia                       -0.11694852 5.825392e-02 1.281586e-01
Barnesiella                   0.08788719 1.551534e-01 2.438125e-01
Butyricimonas                 0.06207930 3.156765e-01 4.629923e-01
Odoribacter                   0.13777399 2.552951e-02 6.240547e-02
Alistipes                     0.20587665 8.008037e-04 2.936280e-03
Parabacteroides              -0.05546489 3.700707e-01 5.088472e-01
Enterococcus                  0.10571978 8.705689e-02 1.741138e-01
Coprococcus                  -0.31736054 1.697072e-07 1.244520e-06
Fusicatenibacter             -0.17055712 5.603382e-03 1.540930e-02
Hungatella                   -0.04184159 4.990343e-01 6.099308e-01
Lachnoclostridium            -0.36982398 8.085071e-10 1.134125e-08
Lachnospiraceae_FCS020_group  0.01337197 8.290071e-01 8.290071e-01
Colidextribacter              0.10125124 1.013142e-01 1.837458e-01
Intestinimonas                0.21838051 3.710357e-04 1.632557e-03
Oscillibacter                 0.22796075 1.998482e-04 1.099165e-03
Negativibacillus              0.18107540 3.249534e-03 1.021282e-02
Family_XIII_AD3011_group      0.36763588 1.031023e-09 1.134125e-08
Dialister                     0.01632857 7.919851e-01 8.290071e-01
Veillonella                  -0.09916474 1.085771e-01 1.837458e-01
Klebsiella                    0.02833834 6.471234e-01 7.493008e-01
Pseudomonas                   0.02540813 6.815006e-01 7.496507e-01
Holdemania                    0.05081956 4.115365e-01 5.325767e-01

heatmap

df_heatmap <- melt(res[[3]] %>% rownames_to_column("SeqID") %>% 
                     mutate(PCo1=r) %>% select(SeqID,PCo1))
Using SeqID as id variables
labels_df <- melt(res[[3]] %>% rownames_to_column("SeqID") %>% select(SeqID,p_adjusted))
Using SeqID as id variables
labels <- labels_df
labels$variable <- "PCo1"
labels$value <- NA
labels$value[labels_df$value<=0.001] <- "***"
labels$value[labels_df$value<=0.01] <- "**"
labels$value[labels_df$value<=0.05] <- "*"

p_mdi_colon <- ggplot() + 
  geom_tile(data=df_heatmap, aes(variable, SeqID, fill= value))  + 
  theme_minimal() + 
  scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.7, 0.7)) + 
  geom_text(data=labels,aes(x=variable,y=SeqID,label=value,vjust=0.6)) +
  xlab("") + ylab("")  

p_mdi_colon
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_text()`).

Calprotectin

metadata_colon_pca %<>% dplyr::mutate(log_Calprotectin=log(Calprotectin)) %>% `rownames<-`(NULL)

metadata_colon_pca <- metadata_colon_pca[match(filt_colon_metadata$SampleID, metadata_colon_pca$SampleID), ] %>% `rownames<-`(NULL)

p_2 <- pca_plot_custom(filt_colon_genus_tab,
                     filt_colon_genus_taxa,
                     metadata_colon_pca,
                     show_boxplots = FALSE,
                     variable = "log_Calprotectin", size=3, 
                     show_legend= FALSE)

p_2

3.3.1.2 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

3.3.2 Supplementary analysis

3.3.2.1 Genus level

level="genus"
3.3.2.1.1 Bray-Curtis

PERMANOVA

pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_colon_genus_metadata$Group,
                           covariate = filt_colon_genus_metadata$Country, 
                           patients = filt_colon_genus_metadata$Patient,
                           sim.method = "bray", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,
                          filt_colon_genus_metadata$Group,
                          covariate = filt_colon_genus_metadata$Country, 
                          patients = filt_colon_genus_metadata$Patient,
                          interaction = TRUE, sim.method = "bray", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.153 0.709 0.001 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 5.847 27.123 0.043 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.167 0.774 0.001 0.991 0.991

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_genus_metadata$Group,
                      covariate = filt_colon_genus_metadata$Country, 
                      patients = filt_colon_genus_metadata$Patient,
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'bray')
  print(result_list)
}
}

Plots

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 measure = "bray",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

3.3.2.1.2 Jaccard

PERMANOVA

pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_colon_genus_metadata$Group,
                           covariate = filt_colon_genus_metadata$Country,
                           patients = filt_colon_genus_metadata$Patient,
                           sim.method = "jaccard", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,
                          filt_colon_genus_metadata$Group,
                          covariate = filt_colon_genus_metadata$Country,
                          patients = filt_colon_genus_metadata$Patient,
                          interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.249 0.827 0.001 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 5.706 18.963 0.031 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.286 0.952 0.002 1 1

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_genus_metadata$Group,
                      covariate = filt_colon_genus_metadata$Country, 
                      patients = filt_colon_genus_metadata$Patient,
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'jaccard')
  print(result_list)
}
}

Plots

Custom

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 measure = "jaccard",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

3.3.2.2 ASV level

level="ASV"
3.3.2.2.1 Aitchison
# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(x=pairwise_df,
                          filt_colon_metadata$Group,
                           covariate = filt_colon_metadata$Country, 
                           sim.method = "robust.aitchison", 
                           p.adjust.m="BH",
                           patients = filt_colon_metadata$Patient)

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
                          covariate = filt_colon_metadata$Country, 
                          interaction = TRUE, 
                          sim.method = "robust.aitchison", 
                          p.adjust.m="BH",
                          patients = filt_colon_metadata$Patient)

pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("aitchison",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)

# see the results
pp_factor
          pairs Df SumsOfSqs  F.Model          R2 p.value p.adjusted sig
1 no_ibd vs ibd  1  249.7248 1.158657 0.001920557       1          1    
pp_cov
                    pairs Df SumsOfSqs F.Model         R2 p.value p.adjusted
1 no_ibd vs ibd , Country  1  1322.314 6.13519 0.01016952   0.001      0.001
  sig
1 ***
pp_fac.cov
                    pairs Df SumsOfSqs  F.Model          R2 p.value p.adjusted
1 no_ibd vs ibd : Country  1  292.4347 1.357634 0.002249026       1          1
  sig
1    
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 249.725 1.159 0.002 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 1322.314 6.135 0.01 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 292.435 1.358 0.002 1 1

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
 for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_metadata$Group,
                      covariate = filt_colon_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      patients = filt_colon_metadata$Patient)
  print(result_list)
} 
}

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                           filt_colon_taxa_tab,
                           filt_colon_metadata,
                           show_boxplots = TRUE,
                           variable = "Group", 
                           size=3, 
                           show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA aitchison",level,segment)]] <- p

# see the results
p

3.3.2.2.2 Bray-Curtis

PERMANOVA

# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_colon_metadata$Group,
                           covariate = filt_colon_metadata$Country,
                           patients = filt_colon_metadata$Patient,
                           sim.method = "bray", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,
                          filt_colon_metadata$Group,
                          covariate = filt_colon_metadata$Country, 
                          patients = filt_colon_metadata$Patient,
                          interaction = TRUE, sim.method = "bray", p.adjust.m="BH")

pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.36 1.086 0.002 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 4.259 12.853 0.021 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.573 1.731 0.003 0.648 0.648

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_metadata$Group,
                      covariate = filt_colon_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      patients = filt_colon_metadata$Patient,
                      sim.method = 'bray')
  print(result_list)
}
}

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                     filt_colon_taxa_tab,
                     filt_colon_metadata,
                     measure = "bray",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

3.3.2.2.3 Jaccard

PERMANOVA

# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_colon_metadata$Group,
                           covariate = filt_colon_metadata$Country,
                           patients = filt_colon_metadata$Patient,
                           sim.method = "jaccard", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,
                          filt_colon_metadata$Group,
                          covariate = filt_colon_metadata$Country, 
                          patients = filt_colon_metadata$Patient,
                          interaction = TRUE, sim.method = "jaccard", p.adjust.m="BH")

pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- rbind(pp_factor, 
                                                            pp_cov, 
                                                            pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.436 1.099 0.002 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 3.371 8.495 0.014 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 0.57 1.438 0.002 0.992 0.992

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.05]

if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_metadata$Group,
                      covariate = filt_colon_metadata$Country, 
                      patients = filt_colon_metadata$Patient,
                      group1 = group1,
                      group2 = group2,
                      sim.method = 'jaccard')
  print(result_list)
}
}

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                     filt_colon_taxa_tab,
                     filt_colon_metadata,
                     measure = "jaccard",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

3.3.2.3 Saving results

write.xlsx(supplements_beta[!grepl("PCoA",names(supplements_beta))],
           file = file.path(path,
           paste0("supplements_beta_diversity_", segment,".xlsx")))

3.4 Univariate Analysis

3.4.1 Main - Genus level

level="genus"
# needed paths
path = "../results/Q3/univariate_analysis"
path_maaslin=file.path("../intermediate_files/maaslin/Q3",level)
# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

# PSC effect
psc_effect <- list()

3.4.1.1 Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
                                                  colon_genus_taxa_tab)
3.4.1.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])
3.4.1.1.1.1 linda
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 10 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group * Country + (1|Patient)')
0  features are filtered!
The filtered data has  600  samples and  147  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")


for (grp in c(group1,group2,group3)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_colon_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)

# see the plot
volcano

3.4.1.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

3.4.1.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

3.4.1.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_colon_uni_data,
                                          filt_colon_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)
3.4.1.1.2 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
3.4.1.1.3 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      colon_taxa_tab)
dotheatmap_linda
}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

3.4.1.2 Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

3.4.2 Supplementary Analysis

3.4.2.1 ASV level

level="ASV"
path_maaslin="../intermediate_files/maaslin/Q3/ASV/"
raw_linda_results[[segment]] <- list()
linda_results[[segment]] <- list()
supplements_wb <- createWorkbook()
3.4.2.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_asv_tab,
                            colon_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 7 ASV(s)
Removing 45 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group * Country + (1|Patient)')
0  features are filtered!
The filtered data has  599  samples and  306  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                 taxa_table = filt_colon_uni_taxa) + 
              ggtitle("Country effect")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_colon_uni_taxa) + 
              ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)

# see the plot
volcano

MaAsLin2

Volcano plot

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") +
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)

# see the results
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                                                                      segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Interaction effect

list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                                    filt_colon_uni_data,
                                                    filt_colon_uni_metadata,
                                                    segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
3.4.2.1.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$colon[grepl(level,names(uni_statistics$colon))],
                                      colon_taxa_tab)
dotheatmap_linda
}
min_clr -1.225739 
max_clr -1.225739 
min_log 4.018077 
max_log 4.018077 

3.4.2.2 Phylum level

level="phylum"
path_maaslin="../intermediate_files/maaslin/Q3/Phylum/"
raw_linda_results_phylum[[segment]] <- list()
linda_results_phylum[[segment]] <- list()

Aggregate taxa

phylum_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = "Phylum")

colon_phylum_tab <- phylum_data[[1]]
colon_phylum_taxa_tab <- phylum_data[[2]]
3.4.2.2.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_phylum_tab,
                            colon_phylum_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 1 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group * Country + (1|Patient)')
0  features are filtered!
The filtered data has  600  samples and  8  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))
Using Phylum for naming
volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                 taxa_table = filt_colon_uni_taxa) + 
              ggtitle("Country effect")
Using Phylum for naming
volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_colon_uni_taxa) + 
              ggtitle("Interaction effect")
Using Phylum for naming
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)

# see the plot
volcano

MaAsLin2

Volcano plot

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") +
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)

# see the results
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                                                                      segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Interaction effect

list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                                    filt_colon_uni_data,
                                                    filt_colon_uni_metadata,
                                                    segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
3.4.2.2.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,uni_statistics$colon[grepl(level,names(uni_statistics$colon))],colon_taxa_tab)
dotheatmap_linda
}

3.4.2.3 Saving results

# ALL DATA
saveWorkbook(supplements_wb,file.path(path,paste0("supplements_uni_analysis_wb_",segment,".xlsx")),overwrite = TRUE)

# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("supplements_significant_taxa_",segment,".xlsx")))

3.5 Results overview

3.5.0.1 Alpha diversity

knitr::kable(pc_observed[[segment]],
             digits = 3,
             caption = "Results of linear model testing ASV Richness")
Results of linear model testing ASV Richness
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -2.810 11.952 439.998 -0.235 0.814 0.814
no_ibd vs ibd - CZ vs NO -10.684 15.292 332.353 -0.699 0.485 0.485
no_ibd vs Groupibd:CountryNO -5.680 16.562 341.720 -0.343 0.732 0.732
knitr::kable(pc_shannon[[segment]],
             digits = 3,
             caption = "Results of linear model testing Shannon index")
Results of linear model testing Shannon index
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd 0.033 0.158 456.049 0.211 0.833 0.833
no_ibd vs ibd - CZ vs NO -0.138 0.203 336.238 -0.678 0.498 0.498
no_ibd vs Groupibd:CountryNO -0.119 0.220 346.763 -0.540 0.589 0.589
knitr::kable(pc_simpson[[segment]],
             digits = 3,
             caption = "Results of linear model testing Simpson index")
Results of linear model testing Simpson index
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -0.007 0.026 453.407 -0.261 0.794 0.794
no_ibd vs ibd - CZ vs NO -0.025 0.033 334.532 -0.744 0.457 0.457
no_ibd vs Groupibd:CountryNO 0.005 0.036 344.950 0.131 0.896 0.896
knitr::kable(pc_pielou[[segment]],
             digits = 3,
             caption = "Results of linear model testing Pielou index")

Table: Results of linear model testing Pielou index

Plots

alpha_div_plots[[paste(segment,"Country")]]

3.5.0.2 Beta diversity

Main results

knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
             digits = 3,
             caption = "Results of PERMANOVA - robust aitchison distance")
Results of PERMANOVA - robust aitchison distance
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 172.580 1.118 0.002 1.000 1.000
no_ibd vs ibd , Country 1 1345.829 8.716 0.014 0.001 0.001 ***
no_ibd vs ibd : Country 1 171.139 1.109 0.002 1.000 1.000

PCA

pca_plots_list[[paste(segment,"genus custom")]]

Supplements

knitr::kable(supplements_beta[!grepl("PCoA",names(supplements_beta)) & (grepl("genus",names(supplements_beta)))],
             digits = 3,
             caption = "Supplementary PERMANOVA results: Bray-curtis, Jaccard distances")
Supplementary PERMANOVA results: Bray-curtis, Jaccard distances
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.198 0.893 0.004 0.581 0.581
no_ibd vs ibd , Country 1 1.523 6.858 0.032 0.001 0.001 ***
no_ibd vs ibd : Country 1 0.262 1.179 0.006 0.264 0.264
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.267 0.870 0.004 0.704 0.704
no_ibd vs ibd , Country 1 1.569 5.123 0.024 0.001 0.001 ***
no_ibd vs ibd : Country 1 0.344 1.123 0.005 0.260 0.260
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.153 0.709 0.001 1.000 1.000
no_ibd vs ibd , Country 1 5.847 27.123 0.043 0.001 0.001 ***
no_ibd vs ibd : Country 1 0.167 0.774 0.001 0.991 0.991
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 0.249 0.827 0.001 1.000 1.000
no_ibd vs ibd , Country 1 5.706 18.963 0.031 0.001 0.001 ***
no_ibd vs ibd : Country 1 0.286 0.952 0.002 1.000 1.000

PCA

plot_list <- supplements_beta[grepl("PCoA",names(supplements_beta)) &
                              grepl(segment,names(supplements_beta))]

ggarrange(plotlist = plot_list,
          labels=names(plot_list),
          font.label = list(size=5,face="plain"),
          ncol=2,nrow=3)

3.5.0.3 Univariate analysis

Number of significant taxa

knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")
Number of significant taxa
Count
colon genus no_ibd vs ibd 0
colon ASV no_ibd vs ibd 1
colon phylum no_ibd vs ibd 0

4 Paper-ready visualizations

4.1 Alpha diversity

p_A <- alpha_div_plots$`terminal_ileum Country` +
  ggtitle("Terminal ileum")+
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15)) 

p_B <-  alpha_div_plots$`colon Country` +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15)) 

Q3_alpha <- ggarrange(p_A,ggplot() + theme_minimal(),p_B,nrow=1, ncol=3,
                      widths = c(1,0.1,1))
Q3_alpha

4.2 Beta diversity

pca_ti <- pca_plots_list$`terminal_ileum genus custom` 
pca_colon <- pca_plots_list$`colon genus custom` 

genus_Q3_beta <- ggarrange(pca_ti,
                           ggplot() + theme_minimal(),
                           pca_colon,ncol=3,
                           widths = c(1,0.1,1))
genus_Q3_beta

Alpha + Beta diversity

alpha_beta <- ggarrange(Q3_alpha,genus_Q3_beta,
                        ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta

4.3 FIGURE 7

alpha_beta <- ggarrange(alpha_beta,ggplot() + theme_minimal(),ncol=2,
                                widths = c(1,0.15))

alpha_beta

4.4 DAA

if (exists("dot_heatmap_ileum")){
 p_ileum <- dot_heatmap_ileum + 
  ggtitle("Terminal ileum") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none") 
}

if (exists("dot_heatmap_colon")){
  p_colon <- dot_heatmap_colon  +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none")
}

if (exists("p_ileum") | exists("p_colon")){
  heatmap_plot <- ggarrange(p_ileum,
                          p_colon,
                          ncol = 2,labels=c("A","B"))
  heatmap_plot
}

5 Session info

sessionInfo()
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=Czech_Czechia.utf8  LC_CTYPE=Czech_Czechia.utf8   
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C                  
[5] LC_TIME=Czech_Czechia.utf8    

time zone: Europe/Prague
tzcode source: internal

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] mice_3.16.0              picante_1.8.2            ape_5.8                 
 [4] lubridate_1.9.3          forcats_1.0.0            stringr_1.5.1           
 [7] tidyverse_2.0.0          kableExtra_1.4.0         tidyr_1.3.1             
[10] gbm_2.2.2                doParallel_1.0.17        iterators_1.0.14        
[13] foreach_1.5.2            ranger_0.17.0            ggvenn_0.1.15           
[16] Maaslin2_1.16.0          purrr_1.0.2              pROC_1.18.5             
[19] glmnet_4.1-8             MicrobiomeStat_1.2       caret_6.0-94            
[22] openxlsx_4.2.6.1         magrittr_2.0.3           emmeans_1.10.7          
[25] lmerTest_3.1-3           robustlmm_3.3-1          lme4_1.1-35.5           
[28] Matrix_1.6-5             mgcv_1.9-1               nlme_3.1-164            
[31] pheatmap_1.0.12          reshape2_1.4.4           vegan_2.6-4             
[34] lattice_0.22-6           permute_0.9-7            ggplotify_0.1.2         
[37] ggrepel_0.9.5            ggpubr_0.6.0             MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0          ggplot2_3.5.1            tibble_3.2.1            
[43] dplyr_1.1.4              cowplot_1.1.3            readr_2.1.5             
[46] igraph_2.0.3             ccrepe_1.38.1            data.table_1.15.4       

loaded via a namespace (and not attached):
  [1] fs_1.6.4                    matrixStats_1.3.0          
  [3] bitops_1.0-7                RColorBrewer_1.1-3         
  [5] numDeriv_2016.8-1.1         tools_4.3.1                
  [7] backports_1.4.1             R6_2.6.1                   
  [9] lazyeval_0.2.2              jomo_2.7-6                 
 [11] rhdf5filters_1.14.1         withr_3.0.2                
 [13] gridExtra_2.3               cli_3.6.2                  
 [15] Biobase_2.62.0              logging_0.10-108           
 [17] biglm_0.9-3                 sandwich_3.1-1             
 [19] labeling_0.4.3              mvtnorm_1.2-4              
 [21] robustbase_0.99-3           pbapply_1.7-2              
 [23] systemfonts_1.1.0           yulab.utils_0.2.0          
 [25] svglite_2.1.3               parallelly_1.38.0          
 [27] rstudioapi_0.17.1           generics_0.1.3             
 [29] gridGraphics_0.5-1          shape_1.4.6.1              
 [31] car_3.1-3                   zip_2.3.1                  
 [33] biomformat_1.30.0           S4Vectors_0.40.2           
 [35] abind_1.4-8                 infotheo_1.2.0.1           
 [37] lifecycle_1.0.4             multcomp_1.4-28            
 [39] yaml_2.3.8                  carData_3.0-5              
 [41] SummarizedExperiment_1.32.0 Rtsne_0.17                 
 [43] rhdf5_2.46.1                recipes_1.1.1              
 [45] SparseArray_1.2.4           grid_4.3.1                 
 [47] mitml_0.4-5                 crayon_1.5.3               
 [49] pillar_1.10.1               knitr_1.50                 
 [51] optparse_1.7.5              GenomicRanges_1.54.1       
 [53] statip_0.2.3                boot_1.3-31                
 [55] estimability_1.5.1          future.apply_1.11.3        
 [57] codetools_0.2-20            pan_1.9                    
 [59] glue_1.7.0                  ggfun_0.1.8                
 [61] vctrs_0.6.5                 treeio_1.26.0              
 [63] gtable_0.3.6                gower_1.0.1                
 [65] xfun_0.51                   S4Arrays_1.2.1             
 [67] prodlim_2024.06.25          libcoin_1.0-10             
 [69] coda_0.19-4.1               pcaPP_2.0-4-1              
 [71] modeest_2.4.0               survival_3.5-8             
 [73] timeDate_4041.110           hardhat_1.4.1              
 [75] lava_1.8.1                  statmod_1.5.0              
 [77] TH.data_1.1-3               ipred_0.9-15               
 [79] ggtree_3.10.1               GenomeInfoDb_1.38.8        
 [81] fBasics_4032.96             rpart_4.1.23               
 [83] colorspace_2.1-0            BiocGenerics_0.48.1        
 [85] DBI_1.2.3                   nnet_7.3-19                
 [87] ade4_1.7-22                 tidyselect_1.2.1           
 [89] timeSeries_4041.111         compiler_4.3.1             
 [91] microbiome_1.24.0           xml2_1.3.6                 
 [93] DelayedArray_0.28.0         scales_1.3.0               
 [95] DEoptimR_1.1-3-1            spatial_7.3-17             
 [97] digest_0.6.35               minqa_1.2.8                
 [99] rmarkdown_2.29              XVector_0.42.0             
[101] htmltools_0.5.8.1           pkgconfig_2.0.3            
[103] MatrixGenerics_1.14.0       stabledist_0.7-2           
[105] fastmap_1.2.0               rlang_1.1.3                
[107] htmlwidgets_1.6.4           farver_2.1.2               
[109] zoo_1.8-12                  jsonlite_1.8.8             
[111] ModelMetrics_1.2.2.2        RCurl_1.98-1.14            
[113] modeltools_0.2-23           Formula_1.2-5              
[115] GenomeInfoDbData_1.2.11     patchwork_1.3.0            
[117] Rhdf5lib_1.24.2             munsell_0.5.1              
[119] Rcpp_1.0.12                 ggnewscale_0.5.1           
[121] fastGHQuad_1.0.1            stringi_1.8.3              
[123] ggstar_1.0.4                stable_1.1.6               
[125] zlibbioc_1.48.2             MASS_7.3-60.0.1            
[127] plyr_1.8.9                  listenv_0.9.1              
[129] Biostrings_2.70.3           splines_4.3.1              
[131] hash_2.2.6.3                multtest_2.58.0            
[133] hms_1.1.3                   ggtreeExtra_1.12.0         
[135] ggsignif_0.6.4              stats4_4.3.1               
[137] rmutil_1.1.10               evaluate_1.0.3             
[139] nloptr_2.1.1                tzdb_0.4.0                 
[141] getopt_1.20.4               future_1.34.0              
[143] clue_0.3-65                 coin_1.4-3                 
[145] broom_1.0.7                 xtable_1.8-4               
[147] tidytree_0.4.6              rstatix_0.7.2              
[149] viridisLite_0.4.2           class_7.3-22               
[151] aplot_0.2.5                 IRanges_2.36.0             
[153] cluster_2.1.6               timechange_0.3.0           
[155] globals_0.16.3